######################
#  Replication code for 'Mediating the Electoral Connection', forthcoming in the JOP
#  John Henderson and John Brooks
#  12/7/2015    
######################    

# prelimRobust.R
#  :: loads functions and data, and performs some basic data preparation tasks for most robustness results
                         
if(length(which(installed.packages()[,1]=='AER'))!=1){
	install.packages('AER')                        
}   

if(length(which(installed.packages()[,1]=='stringr'))!=1){
	install.packages('stringr')                        
}

library(AER)  
library(stringr)        

is.even <- function(x) x %% 2 == 0                
source('coeftest.cluster.R')      
            
##############
# <fes.type>           
#  controls the fixed effects and cluster units used             
#   1. decade x district
#   2. decade x icpsr
#   3. icpsr
#  NOTE: only fes.type = 2 and 3 are called, since decade x district has considerable error

if(length(which(objects()=='fes.type'))==0){
	fes.type=3
}
             
##############  
# <non-missings>                
#  controls certain parameters regarding prior outcomes and the use of redistricting 
#   0. original data -- no redistricting years, including no prior outcomes for redistricting cycles
#   4. use icpsr link to measure prior election data, w/ redistricting years included

if(length(which(objects()=='non.missings'))==0){
	non.missings=4
}   
     
##############          
# <nominates>
#  controls whether unbridge nominate outcomes are used as a robustness check	
#   T or F

if(length(which(objects()=='nominates'))==0){   
	nominates=F
}
 
##############          
# <ghk>     
#  controls whether the gomez, hansford and krause rain measure is used as a robustness check	
#  T or F 

if(length(which(objects()=='ghk'))==0){   
	ghk=F
}
  
##############          
# <seniority>
#  controls the indicator to stratify on levels of seniority, primarily for slider and main results by terms in office
#   -1: don't run/stratify the seniority results
#    1: freshman
#    2: sophomore
#    3: senior
#    4: freshman + sophomore

if(length(which(objects()=='seniority'))==0){   
	seniority = -1
}

##############          
# <or_pre>
#  controls whether stratifying on oregon before or after introduction of vote by mail in 2000
#   -1 don't run/stratify by oregon adoption
#    0 oregon before 2000 & vote by mail
#    1 oregon after 2000 & vote by mail

if(length(which(objects()=='or_pre'))==0){   
	or_pre = -1
}

##############          
# <a1,a3> <early,absentee>
#  controls vote by mail and absentee voting stratifications 	
#   -1 don't run/stratify
#    0 states w/o early/absentee voting
#    1 states w early/absentee voting

if(length(which(objects()=='early'))==0){   
	early = -1
} 
if(length(which(objects()=='absentee'))==0){   
	absentee = -1
}
  
##############          
# <battle>
#  controls stratification on presidential battleground states
#   -1 don't run/stratify
#    0 not battleground states
#    1 pres battleground states

if(length(which(objects()=='battle'))==0){   
	battle = -1
}	
		
# load data
load('FinalRainData.Rdata') 
                            
# load imputed covariates w/o imputing outcomes 
covs=covs_some
               
#############################################################
# reset outcomes to reflect appropriate priors 

if(nominates==T){
	
	# estimated using <nominate> package in R                                                                                                      
	load('nominate/irt_nominate-1.Rdata')
	w1=nominate
	load('nominate/irt_nominate-2.Rdata')
	w2=nominate
	load('nominate/irt_nominate-3.Rdata')
	w3=nominate 
	load('nominate/irt_nominate-4.Rdata')
	w4=nominate
	nominate=c(w1,w2,w3,4)	                                       

	nom_tab=read.table(stringsAsFactors=F,'nominate/nominate_per_congress.txt',sep='\t')
    nom_cong=as.numeric(str_sub(nom_tab[,],1,5))
    nom_icpsr=as.numeric(str_sub(nom_tab[,],6,10))
    nom_dw1=as.numeric(str_sub(nom_tab[,],42,47))
         
	w_noms=array(NA,nrow(rain_data))
	for(i in 1:length(w_noms)){ 

		if(!is.na(rain_data$icpsr[i])){  
					
			if(rain_data$cong[i]<=86){
				nominate=w1
			} else if(rain_data$cong[i]>86 & rain_data$cong[i]<=94){
				nominate=w2			
			} else if(rain_data$cong[i]>94 & rain_data$cong[i]<=103){
				nominate=w3
			} else if(rain_data$cong[i]>103 & rain_data$cong[i]<=111){
				nominate=w4
			}		

			k=rain_data$cong[i]
					   			
			ix=which(rain_data$icpsr[i]==nominate[[k]][,1])
			if(length(ix)>0){		
				w_noms[i]=nominate[[k]][ix,2]
			}
		}	   
	}                
			
	if(non.missings==0){
		ndex=rain_data$ndex
	} else if(non.missings==4){
		ndex=rain_data$prior_obs
	}

	nas_inc=is.na(rain_data$y_inc)
	nas_prv=is.na(rain_data$y_prv)	
	nas_shr=is.na(rain_data$y_shr)
		
	rain_data$y_inc=w_noms
	rain_data$y_shr=w_noms 
	rain_data$y_prv=w_noms[ndex]
	rm(ndex)      
}

       
# 2. original data w/o redistricting years 
if(non.missings==0){       
	
	# election outcomes	
	dem_shr=rain_data$dem_shr
	rep_shr=rain_data$rep_shr	   
	dem_prv=rain_data$dem_prv
	rep_prv=rain_data$rep_prv
		
	dose=dem_shr-rep_shr
	dose_prv=dem_prv-rep_prv
  
	# roll call outcomes
	vote=rain_data$y_inc 
	vote_prv=rain_data$y_prv

    covs$dose=dose
    covs$vote=vote	
    covs$dose_prv=dose_prv
    covs$vote_prv=vote_prv  
	
    covs$turnout=rain_data$turnout          
    covs$turnout_prv=rain_data$turnout_prv

	covs$extremist=rain_data$extremist 
	covs$inc_win=rain_data$inc_win
	covs$inc_win_prv=rain_data$inc_win_prv                                

 	#lagged rain and next rain updates
	#  -- using icpsr for previous baseline  

	covs$rain_elec=rain_data$rain_elec
	
	covs$rain_elec_prev=rain_data$rain_elec[rain_data$ndex]	
	covs$rain_day_prev=rain_data$rain_day[rain_data$ndex]
	covs$rain_weekend_prev=rain_data$rain_weekend[rain_data$ndex]  
	                                                            
	covs$rain_day_prev_prv=rain_data$rain_day_prev[rain_data$ndex]	
	covs$rain_elec_prev_prv=rain_data$rain_elec_prev[rain_data$ndex]
	covs$rain_weekend_prev_prv=rain_data$rain_weekend_prev[rain_data$ndex]

	covs$rain_elec_next=rain_data$rain_elec[rain_data$nedex]
	covs$rain_day_next=rain_data$rain_day[rain_data$nedex]
	covs$rain_weekend_next=rain_data$rain_weekend[rain_data$nedex]  
	
	if(ghk==T){  
		covs$rain_elec00=rain_data$rain_elec00
		covs$rain_elec00_prev=rain_data$rain_elec00[rain_data$ndex]
		covs$rain_ghk=rain_data$RD_ghk_rain_dev
		covs$rain_ghk_prev=covs$rain_ghk[rain_data$ndex]		
	} 
		
}    

# 3. current data using redistricting years + prior outcomes as controls
if(non.missings==4){  
	
	dem_shr=rain_data$dem_shr
	rep_shr=rain_data$rep_shr
                 	
	dem_prv=dem_shr[rain_data$prior_obs]
	rep_prv=rep_shr[rain_data$prior_obs]

	dose=dem_shr-rep_shr
	dose_prv=dem_prv-rep_prv
  	
	vote=rain_data$y_shr	
	vote_prv=vote[rain_data$prior_obs]
	
	vote[which(rain_data$d_inc==1 & rain_data$d_win==0)]=vote_prv[which(rain_data$d_inc==1 & rain_data$d_win==0)]
	vote[which(rain_data$r_inc==1 & rain_data$r_win==0)]=vote_prv[which(rain_data$r_inc==1 & rain_data$r_win==0)]	
		
    ix=which(rain_data$d_inc==1 & rain_data$d_win==0 & dose>0)
	dose[ix]=NA
	dose_prv[ix]=NA                                           
	ix=which(rain_data$d_inc==1 & rain_data$d_win==1 & dose<0)
	dose[ix]=NA      
	dose_prv[ix]=NA                                           
	ix=which(rain_data$r_inc==1 & rain_data$r_win==0 & dose<0)
	dose[ix]=NA     
	dose_prv[ix]=NA                                           
	ix=which(rain_data$r_inc==1 & rain_data$r_win==1 & dose>0)
	dose[ix]=NA   
	dose_prv[ix]=NA  
	
	covs$dose=dose
    covs$vote=vote	
    covs$dose_prv=dose_prv
    covs$vote_prv=vote_prv  
              
    covs$turnout=rain_data$turnout          
    covs$turnout_prv=rain_data$turnout_prv

	covs$extremist=rain_data$extremist 
	covs$inc_win=rain_data$inc_win
	covs$inc_win_prv=rain_data$inc_win_prv                               

    # lagged rain and next rain updates 
	#  -- using icpsr for previous baseline 
	covs$rain_elec=rain_data$rain_elec
	
	covs$rain_elec_prev=rain_data$rain_elec[rain_data$prior_obs]	
	covs$rain_day_prev=rain_data$rain_day[rain_data$prior_obs]
	covs$rain_weekend_prev=rain_data$rain_weekend[rain_data$prior_obs]  
	                                                            
	covs$rain_day_prev_prv=rain_data$rain_day_prev[rain_data$prior_obs]	
	covs$rain_elec_prev_prv=rain_data$rain_elec_prev[rain_data$prior_obs]
	covs$rain_weekend_prev_prv=rain_data$rain_weekend_prev[rain_data$prior_obs]

	covs$rain_elec_next=rain_data$rain_elec[rain_data$next_obs]
	covs$rain_day_next=rain_data$rain_day[rain_data$next_obs]
	covs$rain_weekend_next=rain_data$rain_weekend[rain_data$next_obs]
	
	if(ghk==T){  
		covs$rain_elec00=rain_data$rain_elec00
		covs$rain_elec00_prev=rain_data$rain_elec00[rain_data$prior_obs]		
		covs$rain_ghk=rain_data$RD_ghk_rain_dev
		covs$rain_ghk_prev=covs$rain_ghk[rain_data$prior_obs]		
	}                	                         	                  	
}
        
# assign fixed effects variable
if(fes.type==1){
	fe_id_num=as.numeric(as.character(as.numeric(as.factor(rain_data$decade_district))))
} else if(fes.type==2){
	fe_id_num=as.numeric(as.character(as.numeric(as.factor(rain_data$decade_icpsr))))
} else if(fes.type==3){
	fe_id_num=as.numeric(as.character(as.numeric(as.factor(rain_data$icpsr))))
}                    
     
# covariate matrices        
covs_none=cbind(covs_none,covs[,29:36])
covs_all=cbind(covs_all,as.factor(fe_id_num)) 
covs_some=cbind(covs_some,as.factor(fe_id_num))  
covs_none=cbind(covs_none,as.factor(fe_id_num))  
covs=cbind(covs,as.factor(fe_id_num))


# produce subsets; 
#  -- full: main results
#  -- demx, repx: results by party of incumbent
#  -- extr, modr: results for safe or at-risk incumbents

dist_prev=covs_all$dist_prev
dist_prev_var=dist_prev_avg=array(NA,length(dist_prev))
un_year=unique(rain_data$year)
for(i in 1:length(un_year)){
	inds=which(rain_data$year==un_year[i])
	dist_prev_avg[inds]=mean(dist_prev[inds],na.rm=T)
	dist_prev_var[inds]=sd(dist_prev[inds],na.rm=T)	
}     

dprv=(dist_prev-dist_prev_avg)/dist_prev_var          

# safe v. competitive :: based on presidential vote...

d_prv=rain_data$difDpres[rain_data$prior_obs]
extreme = abs(d_prv)>.075

full=(rain_data$year>1954 & rain_data$lower & rain_data$r_inc!=rain_data$d_inc)
   
###############
# additional stratifications for robustness checks 

# a. vote by early/absentee
# if aX = 1 :: then exclude vote by mail states OR not vote by mail
# if aX = 0 :: then exclude non vote by mail states OR is vote by mail 

if(early==1){
	or=rain_data$early_noexcuse==1  
    full[which(or)]=F
} else if(early==0){
	or=rain_data$early_noexcuse==0  
    full[which(or)]=F
}

if(absentee==1){
	or=rain_data$absentee_noexcuse==1  
    full[which(or)]=F
} else if(absentee==0){
	or=rain_data$absentee_noexcuse==0  
    full[which(or)]=F
}

           
# b. oregon vote by mail
if(or_pre==0){
	full[which(rain_data$year<2000 | rain_data$state!='OR')]=F
} else if(or_pre==1){
	full[which(rain_data$year>=2000 | rain_data$state!='OR')]=F	
}    

# c. battleground states

if(battle!=-1){   

	#indicator for battleground states        
	battleground=read.csv('battleground/battlegrounds.csv',stringsAsFactor=F,header=F)      
	battleground[which(str_sub(battleground[,2],str_length(battleground[,2]))==' '),2]=
		str_sub(battleground[which(str_sub(battleground[,2],str_length(battleground[,2]))==' '),2],1,str_length(battleground[which(str_sub(battleground[,2],str_length(battleground[,2]))==' '),2])-1)

	abbrev=read.table('battleground/state_abbrev.txt',sep='\t',header=T)
	abbrev[,1]=as.character(abbrev[,1])
	abbrev[,2]=as.character(abbrev[,2])
	for(j in 1:nrow(abbrev)){
		inds=which(battleground[,2]==abbrev[j,1])
		battleground[inds,2]=abbrev[j,2]
	}	

	battlestates=array(0,nrow(rain_data))
	for(i in 1:nrow(battleground)){
		ix=which(rain_data$year==battleground[i,1] & rain_data$state==battleground[i,2])
		battlestates[ix]=1
	} 
	
	# include battleground stratification into full   
}                                                  
if(battle==1){
	full[which(battlestates==0)]=F
} else if(battle==0){
	full[which(battlestates==1)]=F	
}
     
   
# addendum :: additional measure for # terms in office => final seniority results      
# 	- will be easier to produce <terms> then stratify on terms in full
#  also add :: safeness x party slider ...

noms=read.csv('nominate/hou_nom_icpsr.csv',stringsAsFactors=F)
icpsr=rain_data$icpsr   
year=rain_data$year
cong=rain_data$cong
	
un_icpsr=unique(rain_data$icpsr[which(!is.na(rain_data$icpsr))]) 
terms=array(NA,length(un_icpsr))
sns=c()
          
base=array(NA,length(un_icpsr))
for(i in 1:length(un_icpsr)){
	ix = which(rain_data$icpsr==un_icpsr[i])  
	iy=which(noms[,2]==un_icpsr[i])  
	base[i]=length(which(cong[ix[1]]>noms[iy,1]))   
	
	for(j in 1:length(ix)){
		terms[ix][j]=j+base[i]
	}
}

# stratification for seniority results
if(seniority[1] > 0){
	
	# limit to just freshmen, sophmores, more seniors     	
	if(seniority[1]==1){ 
		if(non.missings==4){
			ndx=rain_data$prior_obs
		} else if(non.missings==0){
			ndx=rain_data$ndex
		} 
		senior=which(terms==1)	    	 
		senior_prv=array(NA,length(senior))
		
		for(i in 1:length(senior)){        
			ix=which(rain_data$cds[senior[i]]==rain_data$cds & 
				rain_data$state[senior[i]]==rain_data$state & 
				(rain_data$year[senior[i]]-2)==rain_data$year) 
			if(length(ix)==1){
				senior_prv[i]=ix
			}	
		} 
            
		full[-c(senior)]=F
		ndx=array(NA,nrow(rain_data)) 
		ndx[senior]=senior_prv

		covs[,"dose_prv"]=covs[,"dose"][ndx]              
		covs[,"vote_prv"]=covs[,"vote"][ndx]              
                    
		covs[,"rain_elec_prev"]=covs[,"rain_elec"][ndx]                                      
		covs[,"rain_day_prev"]=covs[,"rain_day"][ndx]                                                  
		covs[,"rain_weekend_prev"]=covs[,"rain_weekend"][ndx]                                                   

		covs[,"rain_elec_prev_prv"]=covs[,"rain_elec_prev"][ndx]                            
		covs[,"rain_day_prev_prv"]=covs[,"rain_day_prev"][ndx]                                                             
		covs[,"rain_weekend_prev_prv"]=covs[,"rain_weekend_prev"][ndx]                            	
	} 
	
	if(seniority[1]>1){ 
		iq=c() 
		for(k in 1:length(seniority)){
			iq=c(iq,which(terms!=seniority[k]))
		}   
		iq=as.numeric(names(table(iq)[which(table(iq)==length(seniority))]))
		full[c(iq)]=F
	}	
}
   	   
 
demx=(rain_data$year>1954 & rain_data$lower & rain_data$d_inc == 1) 
repx=(rain_data$year>1954 & rain_data$lower & rain_data$r_inc == 1)  

modr=(rain_data$year>1954 & rain_data$lower==1 & extreme==F)  
extr=(rain_data$year>1954 & rain_data$lower==1 & extreme==T) 

full=which(full)  
demx=which(demx)
repx=which(repx)
modr=which(modr)
extr=which(extr) 
	                 
# END >>>> next files